*Write a permanent log file;
PROC PRINTTO LOG="C:\Research\Alcohol and Morbidity\7. Code Archive\Data Files and Code That Produced Them\P05 Inpatient Code up Visits AZ.log" NEW;
RUN;

*Write a permanent list file;
PROC PRINTTO PRINT="C:\Research\Alcohol and Morbidity\7. Code Archive\Data Files and Code That Produced Them\P05 Inpatient Code up Visits AZ.lst" NEW;
RUN;

/*
Recode Arizona hospital data 1990-2009

*/

libname AZ_INP "D:\Data\Hospital Inpatient Records\Hospital Arizona (1990-2009)";


*Variables to keep;
%let keep_v1 = AGE  DX1 PAY1 TOTCHG;


%macro pull_az(lib,file,keep_v2,keep_v3,keep_v4,keep_v5,keep_v6,keep_v7,ln);
data &file. (drop = &keep_v4. &keep_v6.);
   format state $2. file_year 8. file $17.;
   set &lib..&file. (keep = &keep_v1. &keep_v2. &keep_v3. &keep_v4. &keep_v5. &keep_v6. &keep_v7.);
  *Record the file name and state postal code; 
   file = "&file.";
   state = substr("&lib.",1,2);
   file_year = substr("&file.",9,4);
  *Compute age in months relative to age 21 - the intck function counts the number of starting 
   points of the interval crossed. So an ED visit occurring in the birth month will be 
   have a months_21 value of 0;
   if substr("&file.",9,4) in (1995,1996,1997) then do;
      months_21 = intck('month',mdy(month(dob),14,year(dob)+21),mdy(month(ADATE),15,year(ADATE)));
	  ad_date = mdy(month(ADATE),15,year(ADATE));;
   end; 
   if substr("&file.",9,4) not in (1995,1996,1997) then do;
      months_21 = intck('month',mdy(bmonth,14,byear+21),mdy(amonth,15,ayear));
	  ad_date = mdy(amonth,15,ayear);
   end;
   if months_21 = . then bad_date = 1; else bad_date = 0;
  *Flag for ED admission on inpatient in some states they switch from one source coding to 
   another in 2007. In 2007 the rates dip in those states suggesting a slippage in coding is occurring;
   from_ED = 0;
   if asource = "1" then from_ED = 1; 
   if &keep_v7. = "7" then from_ED = 1;
  *Code up discharge;
   if &keep_v6. = 20 then died = 1; else died = 0;
  *Flag for pregnant;
   if substr(DX1,1,2) in ('63','64','65','66','67') then preg_related = 1; else preg_related = 0;
  *Restrict based on the age in months variable;
   male = 0;
   if substr("&file.",9,4) <= 1997 and &keep_v5. = 1 then male = 1; 
   if substr("&file.",9,4) > 1997 and &keep_v5. = 0 then male = 1; 
  *In a typical year gender is missing for one observation;
   if substr("&file.",9,4) <= 1997 and &keep_v5. = . then male = .; 
   if substr("&file.",9,4) > 1997 and &keep_v5. = . then male = .; 
   count = 1;
run;
*Check for bad dates and un-coded gender;
proc freq data = &file.;
   tables bad_date male;
   title "&File. Missing Exact Age";
run;
%mend;

*Keep all ecodes;

%pull_az(AZ_INP,az_sidc_1990_core,DX1,DX2-DX5,bmonth byear amonth ayear asource,sex,DISP,ayear,5);
%pull_az(AZ_INP,az_sidc_1991_core,DX1,DX2-DX5,bmonth byear amonth ayear asource,sex,DISP,ayear,5);
%pull_az(AZ_INP,az_sidc_1992_core,DX1,DX2-DX5,bmonth byear amonth ayear asource,sex,DISP,ayear,5);
%pull_az(AZ_INP,az_sidc_1993_core,DX1,DX2-DX5,bmonth byear amonth ayear asource,sex,DISP,ayear,5);
%pull_az(AZ_INP,az_sidc_1994_core,DX1,DX2-DX5,bmonth byear amonth ayear asource,sex,DISP,ayear,5);
%pull_az(AZ_INP,Az_sidc_1995_core_ext,DX1,DX2-DX11,DOB ADATE asource,sex,DISP,DOB,11);
%pull_az(AZ_INP,Az_sidc_1996_core_ext,DX1,DX2-DX11,DOB ADATE asource,sex,DISP,DOB,11);
%pull_az(AZ_INP,Az_sidc_1997_core_ext,DX1,DX2-DX11,DOB ADATE asource,sex,DISP,DOB,11);
%pull_az(AZ_INP,az_sidc_1998_core,DX1,DX2-DX11,bmonth byear amonth ayear asource,female,DISPUNIFORM,ayear,11);
%pull_az(AZ_INP,az_sidc_1999_core,DX1,DX2-DX11,bmonth byear amonth ayear asource,female,DISPUNIFORM,ayear,11);
%pull_az(AZ_INP,az_sidc_2000_core,DX1,DX2-DX11,bmonth byear amonth ayear asource,female,DISPUNIFORM,ayear,11);
%pull_az(AZ_INP,az_sidc_2001_core,DX1,DX2-DX11,bmonth byear amonth ayear asource,female,DISPUNIFORM,ayear,11);
%pull_az(AZ_INP,az_sidc_2002_core,DX1,DX2-DX11,bmonth byear amonth ayear asource,female,DISPUNIFORM,ayear,11);
%pull_az(AZ_INP,az_sidc_2002_core,DX1,DX2-DX11,bmonth byear amonth ayear asource,female,DISPUNIFORM,ayear,11);
%pull_az(AZ_INP,az_sidc_2003_core,ECODE1-ECODE6,DX2-DX9,bmonth byear amonth ayear asource,female,DISPUNIFORM,ayear,9);
%pull_az(AZ_INP,az_sidc_2004_core,ECODE1-ECODE5,DX2-DX9,bmonth byear amonth ayear asource,female,DISPUNIFORM,ayear,9);
%pull_az(AZ_INP,az_sidc_2005_core,ECODE1-ECODE5,DX2-DX9,bmonth byear amonth ayear asource,female,DISPUNIFORM,ayear,9);
%pull_az(AZ_INP,az_sidc_2006_core,ECODE1-ECODE5,DX2-DX9,bmonth byear amonth ayear asource,female,DISPUNIFORM,ayear,9);
%pull_az(AZ_INP,az_sidc_2007_core,ECODE1-ECODE5,DX2-DX9,bmonth byear amonth ayear asource,female,DISPUNIFORM,PointOfOriginUB04,9);
%pull_az(AZ_INP,az_sidc_2008_core,ECODE1-ECODE8,DX2-DX25,bmonth byear amonth ayear,female,DISPUNIFORM,PointOfOriginUB04,25);
%pull_az(AZ_INP,az_sidc_2009_core,ECODE1-ECODE6,DX2-DX25,bmonth byear amonth ayear,female,DISPUNIFORM,PointOfOriginUB04,25);

                  
*Combine files;
data Arizona_inp (drop = i j e_dx1-e_dx8);
   format ad_date date.;
   set az_sidc_1990_core 		az_sidc_1991_core 		az_sidc_1992_core 		az_sidc_1993_core 	az_sidc_1994_core 
       Az_sidc_1995_core_ext 	Az_sidc_1996_core_ext 	az_sidc_1997_core_ext 	az_sidc_1998_core 	az_sidc_1999_core 
       az_sidc_2000_core 		az_sidc_2001_core 		az_sidc_2002_core 		az_sidc_2003_core 	az_sidc_2004_core 
       az_sidc_2005_core 		az_sidc_2006_core 		az_sidc_2007_core 		az_sidc_2008_core 	az_sidc_2009_core;
  *Only 418 of 11,863,244 have bad dates;
   if bad_date = 1 then delete;
  *Restricting to the right age reduces the sample to 884,474;
   if months_21 < -40 or months_21 > 39 then delete;
  *Dropping admissions that started before 1990 reduces the sample to 884,194;
   if state = 'AZ' and ad_date < mdy(1,1,1990) then delete;
  *Write out e-codes for early years;
   array icd{25} dx1-dx25;
   array e_dx{8} $5. e_dx1-e_dx8;
   array ec{8}  ECODE1-ECODE8;
   i = 1;
   j = 1;
   do until (i > 25);
      if file_year < 2003 and substr(icd{i},1,1) = 'E' then do;
         e_dx{j} = icd{i};
		 j = j + 1;
      end;
      if icd{i} = "" then i = 99;*Exit loop;
      i+ +1; *Increment loop;
   end;
  *Need to do this in two steps otherwise it retains the E code across rows - not sure why it is retaining it;
   if file_year < 2003 then do;
      do i = 1 to 8;
          ec{i} = e_dx{i};
      end;
   end;
  *Code up alcohol mention;
   alcohol_mention = 0;
   alcohol_mention_oth = 0;
   i = 2; *Start with second ICD code;
   do until (i > 25);
      if substr(icd{i},1,3) in ('291','303') or substr(icd{i},1,4) = '3050' then alcohol_mention = 1; 
	 *These are other minor mentions of alcohol;
      if substr(icd{i},1,4) in ("3575", "4255", "5353", "5710", "5711", "5712", "5713", "7903", "9800", "V113", "V791" )  then alcohol_mention_oth = 1; 
      if icd{i} = '' then i = 99; *Exit loop;
		 i+ +1;
   end;  
   alcohol = 0;
   if substr(DX1,1,3) in ('291','303') or substr(DX1,1,4) = '3050' then alcohol = 1; 
  *Overall injury category for regressions;
   if ECODE1 ne '' then injury = 1; else injury = 0;
   *Source and location of injury;
   inj_by_self_e = 0;
   inj_by_oth_e = 0;
   place = "       ";
  *Code up alcohol or drugs any mention;
   i = 1;
   do until (i > 8);
     *There are very few people that are coded as both (175) and going over all the ecodes only adds a few percent to the number coded; 
      if substr(ec{i},2,2) = '95' then inj_by_self_e = 1; 
      if substr(ec{i},2,2) = '96' then inj_by_oth_e = 1;  
	 *Place often coded for assaults;
      if substr(ec{i},2,3) = '849' then place = ec{i};
      if substr(ec{i},2,4) in ('8600','8601') then alcohol_mention_oth = 1; 
      if ec{i} = '' then i = 99; *Exit loop;
		 i+ +1;
   end;
  *Create mutually exclusive categories precedence - alcohol,;
    if alcohol = 0 and inj_by_oth_e = 1 then inj_by_oth  = 1; else inj_by_oth = 0;
    if alcohol = 0 and inj_by_oth_e = 0 and  inj_by_self_e = 1 then inj_by_self  = 1; else inj_by_self = 0;
    if alcohol = 0 and inj_by_oth_e = 0 and  inj_by_self_e = 0 and  injury = 1 then inj_accident  = 1; else inj_accident = 0;
    if alcohol = 0 and inj_by_oth_e = 0 and  inj_by_self_e = 0 and  injury = 0 and alcohol_mention = 1 then alcohol_on_dx = 1; else alcohol_on_dx = 0; *Alcohol not marked as primary cause but mentioned;
    if alcohol = 0 and inj_by_oth_e = 0 and  inj_by_self_e = 0 and  injury = 0 and alcohol_mention = 0 then illness = 1; else illness = 0; *Residual category;
  *Create combined categories;
   alcohol_any = alcohol + alcohol_on_dx;
   injury_or_alc =  inj_by_oth + inj_by_self + inj_accident + alcohol + alcohol_on_dx ;
  *Make sure the list is exhaustive;
	visit = alcohol + inj_by_self + inj_by_oth + inj_accident + alcohol_on_dx + illness;
  *Flag for insurance;
   if pay1 = 2	then Medicaid = 1; else Medicaid = 0;
   if pay1 = 3	then Private = 1; else Private = 0;
   if pay1 = 4	then Self_pay = 1; else Self_pay = 0;
   if pay1 not in (2,3,4) then other_ins = 1; else other_ins = 0;
   file_short = substr(file,1,5);
  *Code up female - need to do it this way as had female vs sex in underlying files;
   if male = 1 then female = 0;
   if male = 0 then female = 1;

   if TOTCHG = . then miss_chg = 1; else miss_chg = 0; 
   if female = . then miss_female = 1; else miss_female = 0; 
run;

*Clean up;
proc datasets;
   delete
       az_sidc_1990_core az_sidc_1991_core az_sidc_1992_core az_sidc_1993_core az_sidc_1994_core 
       Az_sidc_1995_core_ext Az_sidc_1996_core_ext az_sidc_1997_core_ext az_sidc_1998_core az_sidc_1999_core 
       az_sidc_2000_core az_sidc_2001_core az_sidc_2002_core az_sidc_2003_core az_sidc_2004_core 
       az_sidc_2005_core az_sidc_2006_core az_sidc_2007_core az_sidc_2008_core az_sidc_2009_core;
run;

*Direct list file back to lst window;
PROC PRINTTO PRINT=PRINT;
RUN; 

*Direct log file back to log window;
PROC PRINTTO LOG=LOG;
RUN; 
